*** LIS Cross-section Data center in Luxembourg

* email: usersupport@lisdatacenter.org 

*** LIS Self Teaching Package 2022
*** Part II: Gender, employment, and wages
*** Stata version

* last change of this version of the syntax: 15-01-2022.


** Exercise 4: Dependent employment and hourly wages

global varshh "hid own"
global varspp "hid dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1"
global datasets "us04 be04 gr04"

program define make_data
foreach ccyy in $datasets {
use $varspp using $`ccyy'p, clear
merge m:1 hid using $`ccyy'h, keepusing($varshh)
keep if inrange(age,25,54) & relation<=2200
if "`ccyy'" != "us04" {
append using ${mydata}exercise2_LIS
}
save ${mydata}exercise2_LIS, replace
}
end

program define recode_data
recode status1 (100/120=1) (200/240=0) (else=.), gen(depemp)
label define depempl 0 "not in dependent employment" 1 "in dependent employment"
label values depemp depempl
gen hourwage = hwage1
replace hourwage=0 if hwage1<0
gen hourwage_log=log(hourwage) 
* keep negatives and 0 in the overall distribution of non-missing dhi 
replace hourwage_log=0 if hourwage_log==. & hourwage!=.  
foreach ccyy in $datasets {
sum hourwage_log [aw=ppopwgt] if dname=="`ccyy'", de
if "`ccyy'" == "us04" {
gen iqr=r(p75)-r(p25) if dname=="`ccyy'"
* detect upper bound for extreme values 
gen upper_bound=r(p75) + (iqr * 3) if dname=="`ccyy'"
gen lower_bound=r(p25) - (iqr * 3) if dname=="`ccyy'"
}
if "`ccyy'" != "us04" {
replace iqr=r(p75)-r(p25) if dname=="`ccyy'"
* detect upper bound for extreme values 
replace upper_bound=r(p75) + (iqr * 3) if dname=="`ccyy'"
replace lower_bound=r(p25) - (iqr * 3) if dname=="`ccyy'"
}
* top code income at upper bound for extreme values 
replace hourwage=exp(upper_bound) if hourwage>exp(upper_bound) & !mi(hourwage) & dname=="`ccyy'"
* bottom code income at lower bound for extreme values 
replace hourwage=exp(lower_bound) if hourwage<exp(lower_bound) & !mi(hourwage) & dname=="`ccyy'"
}
end

program define get_descriptives
bysort dname: tab sex depemp [aw=ppopwgt] if emp==1, row nofreq
bysort dname sex: sum hourwage [aw=ppopwgt], de
end

quietly make_data
use ${mydata}exercise2_LIS, clear
quietly recode_data
get_descriptives
